Notes:
Notes:
library(ggplot2)
pf <- read.csv("pseudo_facebook.tsv", sep = "\t")
qplot(x = age, y = friend_count, data = pf)
Response: youger have more friends
Notes:
ggplot(aes(x = age, y = friend_count), data = pf) +
geom_jitter(alpha = 1/20) +
xlim(13, 90)
## Warning: Removed 5200 rows containing missing values (geom_point).
Notes:
ggplot(aes(x = age, y = friend_count), data = pf) +
geom_jitter(alpha = 1/20) +
xlim(13, 90)
## Warning: Removed 5197 rows containing missing values (geom_point).
Response:
Notes:
ggplot(aes(x = age, y = friend_count), data = pf) +
geom_point(alpha = 1/20) +
coord_trans( y = "sqrt") +
xlim(13, 90)
## Warning: Removed 4906 rows containing missing values (geom_point).
Notes:
ggplot(aes(x = age, y = friendships_initiated), data = pf) +
geom_point(alpha = 1/60) +
coord_trans( y = "sqrt") +
xlim(13, 90)
## Warning: Removed 4906 rows containing missing values (geom_point).
Notes:
Notes:
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
age_groups <- group_by(pf, age)
pf.fc_by_age <- summarise(age_groups,
friend_count_mean = mean(friend_count),
friend_count_meadian = median(friend_count),
n = n())
pf.fc_by_age <- arrange(pf.fc_by_age, age)
Create your plot!
pf.fc_by_age <- pf %>%
group_by(age) %>%
summarise(friend_count_mean = mean(friend_count),
friend_count_median = median(friend_count),
n = n()) %>%
arrange(age)
head(pf.fc_by_age)
## # A tibble: 6 × 4
## age friend_count_mean friend_count_median n
## <int> <dbl> <dbl> <int>
## 1 13 164.7500 74.0 484
## 2 14 251.3901 132.0 1925
## 3 15 347.6921 161.0 2618
## 4 16 351.9371 171.5 3086
## 5 17 350.3006 156.0 3283
## 6 18 331.1663 162.0 5196
Notes:
ggplot(aes(x = age, y = friend_count), data = pf) +
xlim(30, 90)+
geom_point(alpha = 0.05,
position = position_jitter(h = 0),
color = 'orange') +
coord_trans(y = 'sqrt') +
geom_line(stat = 'summary', fun.y = mean) +
geom_line(stat = 'summary', fun.y = quantile,
fun.args = list(probs = .1),
linetype = 2, color = 'blue') +
geom_line(stat = 'summary', fun.y = quantile,
fun.args = list(probs = .1),
color = 'blue') +
geom_line(stat = 'summary', fun.y = quantile,
fun.args = list(probs = .1),
linetype = 2, color = 'blue')
## Warning: Removed 56588 rows containing non-finite values (stat_summary).
## Warning: Removed 56588 rows containing non-finite values (stat_summary).
## Warning: Removed 56588 rows containing non-finite values (stat_summary).
## Warning: Removed 56588 rows containing non-finite values (stat_summary).
## Warning: Removed 57486 rows containing missing values (geom_point).
Response:
ggplot(aes(x = age, y = friend_count), data = pf) +
coord_cartesian(c(13,90)) +
geom_point(alpha = 0.05,
position = position_jitter(h = 0),
color = 'orange') +
geom_line(stat = 'summary', fun.y = mean) +
geom_line(stat = 'summary', fun.y = quantile,
fun.args = list(probs = .1),
linetype = 2, color = 'blue') +
geom_line(stat = 'summary', fun.y = quantile,
fun.args = list(probs = .1),
color = 'blue') +
geom_line(stat = 'summary', fun.y = quantile,
fun.args = list(probs = .1),
linetype = 2, color = 'blue')
See the Instructor Notes of this video to download Moira’s paper on perceived audience size and to see the final plot.
Notes:
Notes:
cor(x = pf$age, y = pf$friend_count, use = "all.obs", method = "pearson")
## [1] -0.02740737
Look up the documentation for the cor.test function.
What’s the correlation between age and friend count? Round to three decimal places. Response:
Notes:
with( subset(pf, age <= 70) , cor.test(age, friend_count))
##
## Pearson's product-moment correlation
##
## data: age and friend_count
## t = -52.592, df = 91029, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1780220 -0.1654129
## sample estimates:
## cor
## -0.1717245
Notes:
Notes:
ggplot(pf, aes(x=www_likes_received, y=likes_received)) +
coord_cartesian(xlim = c(0,quantile(pf$www_likes_received, 0.95)),
ylim = c(0,quantile(pf$likes_received, 0.95))) +
geom_point(shape=1) +
geom_smooth(method = 'lm', color = 'red')
Notes:
with(pf, cor(www_likes_received, likes_received))
## [1] 0.9479902
What’s the correlation betwen the two variables? Include the top 5% of values for the variable in the calculation and round to 3 decimal places.
Response:
Notes:
Notes:
# install.packages('alr3')
library(alr3)
## Loading required package: car
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
temp <- data(Mitchell)
temp <- Mitchell
rm(temp)
Create your plot!
ggplot(Mitchell, aes(y=Temp, x=Month)) +
geom_point(shape=1) +
scale_x_continuous(breaks = seq(0, 203, 12))
with(Mitchell, cor(Month, Temp))
## [1] 0.05747063
Take a guess for the correlation coefficient for the scatterplot.
What is the actual correlation of the two variables? (Round to the thousandths place)
pf$age_with_months <- pf$age + (1 - pf$dob_month / 12)
Notes:
What do you notice? Response:
Watch the solution video and check out the Instructor Notes! Notes:
Notes:
age_with_months_groups <- group_by(pf, age_with_months)
pf.fc_by_age_months2 <- summarise(age_with_months_groups,
friend_count_mean = mean(friend_count),
friend_count_median = median(friend_count),
n = n())
pf.fc_by_age_months2 <- arrange(pf.fc_by_age_months2, age_with_months)
library(dplyr)
pf.fc_by_age_months <- pf %>%
group_by(age_with_months) %>%
summarise(friend_count_mean = mean(friend_count),
friend_count_median = median(friend_count),
n = n()) %>%
arrange(age_with_months)
head(pf.fc_by_age_months)
## # A tibble: 6 × 4
## age_with_months friend_count_mean friend_count_median n
## <dbl> <dbl> <dbl> <int>
## 1 13.16667 46.33333 30.5 6
## 2 13.25000 115.07143 23.5 14
## 3 13.33333 136.20000 44.0 25
## 4 13.41667 164.24242 72.0 33
## 5 13.50000 131.17778 66.0 45
## 6 13.58333 156.81481 64.0 54
Programming Assignment
ggplot(pf.fc_by_age_months, aes(y=friend_count_mean, x=age_with_months)) +
geom_point(shape=1)
Notes:
ggplot(aes(x = age_with_months, y = friend_count_mean),
data = subset(pf.fc_by_age_months, age_with_months < 71))+
geom_line() +
geom_smooth()
## `geom_smooth()` using method = 'loess'
Notes:
Reflection:
Click KnitHTML to see all of your hard work and to have an html page of this lesson, your answers, and your notes!